/* Create CPS data for final version of paper */
/* Includes merge with the instrument */

#delimit';'
set memory 2000m;
set more off;

/* Open the data file */
use "cps_ss_8013",replace;
sum;

keep if year>=1980&year<=1999;

/* To follow EGP, compute number of adults in household */
/* Should be those 19 and up by their definition */
gen adult = 1 if age>=19;

/* In IPUMS-CPS, unique HH is identified by year-month-serial */
/* In Regular CPS, unique HH is identified by h_year-h_month-ph_seq */
/* Have already remapped h_year to year and h_month to month */
/* HOWEVER, serial in IPUMS-CPS IS NOT THE SAME as ph_seq in CPS!!! */
/* IPUMS created serial as its own code */
/* However, for purposes of creating a unique HH id, remapping is fine */
replace serial = ph_seq if year>=1988;
egen nadults = sum(adult), by(year month serial);
tab nadults, missing;


/* Adjust the SS flag for fl_665 early so can merge on spouse data */
gen qincssnew = qincss;
tab fl_665, missing;
replace qincssnew = 3 if fl_665>1&fl_665!=.;


/* Need to get spouse data for married men - both as head or spouse of head */
/* as in EGP */
/* Have separate vars for IPUMS-CPS data 1980-87 and NBER CPS 1988-2013 */
/* Id num of spouse in IPUMS-CPS is sploc, NBER CPS is a_spouse */
/* The id in question in IPUMS-CPS is , NBER CPS is a_lineno */ 

/* To get spouses age from regular CPS, need ph_seq, a_lineno, and a_spouse */
/* ph_seq is a unqiue hh id, a_lineno is your within sample identifier, */
/* and a_spouse is the lineno of your spouse */
/* For what we do here, only need to find spouse for household head */
/* See http://www.psc.isr.umich.edu/dis/data/ref/personID.html */

gen malehead = sex==1&relate<=201;
gen tempa = sploc if malehead==1&year>=1980&year<=1987;
replace tempa = a_spouse if malehead==1&year>=1988;
egen spouseid = min(tempa), by(year month serial);
tab spouseid, missing;
egen numspouse = count(tempa), by(year month serial);
tab numspouse, missing; /* If greater than 1, we have a problem */

gen byte spouse = 0;
replace spouse = 1 if spouseid==pernum&year>=1980&year<=1987;
replace spouse = 1 if spouseid==a_lineno&year>=1988;
tab spouse, missing;

gen agetemp = age if spouse==1;
gen eductemp = educ if spouse==1;
gen incsstemp = incss if spouse==1;
gen qincssnewtemp = qincssnew if spouse==1;

egen agespouse = min(agetemp), by(year month serial);
egen educspouse = min(eductemp), by(year month serial);
egen incssspouse = min(incsstemp), by(year month serial);
egen qincssnewspouse = min(qincssnewtemp), by(year month serial);

for var agespouse educspouse incssspouse qincssnewspouse:
        replace X = . if malehead!=1;

tab marst if malehead==1;
sum year agespouse educspouse incssspouse qincssnewspouse;

drop malehead tempa spouseid numspouse spouse agetemp eductemp
     incsstemp qincssnewtemp;


/* EGP define families as follows: */
/* HH head, their spouse, and any children under 19 are a family */
/* Any other household member is their own family */
/* Within families assign SS beneficiary */
/* First look for male age 65 and up */
/* If none, use never married females age 65 and up */
/* If still none, use widowed or divorced female age 62 and up */
/*    For these last women, assume spouse was three years older */

/* Effectively, keep a) all males 65 and up */
/*                   b) all females never married age 65 and up */
/*                   c) all females widowed or divorced age 62 and up */
/* Confirm with relationship to head variables but should not be an issue */

keep if (sex==1&age>=65)|(sex==2&age>=65&marst==6)|
        (sex==2&age>=62&(marst==4|marst==5));

sum;
tab nadults;


/* Create year of birth for household SS recipient */
gen yob = year - age - 1;

/* For males and never married females, use own year of birth */
/* For widows and divorces, assume heads year of birth is three years earlier*/
gen yobh = yob if (sex==1&age>=65)|(sex==2&age>=65&marst==6);
replace yobh = yob-3 if sex==2&age>=62&(marst==4|marst==5);

gen ageh = age if (sex==1&age>=65)|(sex==2&age>=65&marst==6);
replace ageh = age+3 if sex==2&age>=62&(marst==4|marst==5);

sum year yob yobh ageh;


/* They only use recipients born from 1900 to 1933 in analysis to match EGP */
keep if yobh>=1900&yobh<=1933;

sum year yob yobh;




/* Determine those with missing SS */
/* Now note where values are imputed in qincss code 3 based on fl_665 */
/* Flag REGARDLESS of SS payment amount if fl_665>1 */
/* NOTE: CREATE QINCSSNEW ABOVE */
tab fl_665, missing;

tab qincssnewspouse, missing;

tab year qincssnew, row;

tab qincssnew qincssnewspouse, missing;

gen hasincss = incss>0;
tab year hasincss, row;
tab ageh hasincss, row;
tab marst hasincss, row;



/* Merge on Englehardt et al (2005) instrument by yob and education group */
/* NOW USE EG (2005) volume chapter data from 1885-1930 */
/* Single value for entire birth cohort - does NOT vary by education */
sort yobh;

merge yobh using "eg_2005_inst.dta", keep(inst_8530_all);
tab _merge;
keep if _merge==1|_merge==3;
drop _merge;

/* Use the variable name INST throughout to match prior code */
rename inst_8530_all inst;


/* Create education categories */
gen educ0 = 1 if educ<=71; /* High School Dropout */
           /* May consider 71=12th, no diploma with group 2 */
replace educ0 = 2 if educ==72|educ==73; /* High school degree */
replace educ0 = 3 if educ>=80&educ<=100; /* Some college */
replace educ0 = 4 if educ>=110; /* College Degree and up */
tab educ educ0;


/* Create indicator for whether living independently */
/* For women, not married: living independently if nadults==1 */
/* For men, depends on whether head of household */
/*    If NOT head of household: living independently if nadults==1 */
/*    If head of houseold then: */
/*        living independently if nadults==1 AND EITHER */
/*                               (not married [marst>=3] OR */
/*                                married but spouse not present [marst==2])*/ 
/*        living independently if nadults==2 and spouse present [marst==2] */

/* Women - all unmarried in sample */
gen indlive = nadults==1&sex==2;

/* Men who are neither HH head or spouse of HH head (relate>201) */
replace indlive = 1 if nadults==1&sex==1&(relate>201);

/* Men who are either HH head or spouse of HH head (relate<=201) */
replace indlive = 1 if nadults==1&sex==1&relate<=201&marst>=2;
replace indlive = 1 if nadults==2&sex==1&relate<=201&marst==1;
replace indlive = 1 if nadults==1&sex==1&relate<=201&marst==1; /* TEST:NO ONE*/

tab indlive;
tab marst indlive, row;
bysort sex: tab marst indlive, row;
bysort sex: tab relate indlive, row;
gen sharlive = 1 - indlive;


/* Create Census division variable */
/* CPS based on 1960 Census State code - recode to match IPUMS-CPS REGION */
recode hg_st60 (11/16 = 11) (21/23 = 12) (31/35 = 21) (41/47 = 22)
               (51/59 = 31) (61/64 = 32) (71/74 = 33) (81/88 = 41)
               (91/95 = 42), gen(division);
/* IPUMS-USA = REGION variable is really split by Census division */
replace division = region if year>=1980&year<=1987;


/* Create some indicators for regressions */
gen female = sex==2;
gen white = race==100; /* Multipunch not until 2002 */


/* Add spouse SS income to heads for married males */
egen incsstot = rsum(incss incssspouse);

/* Examine movements in SS income by birth cohort */
/* EGP have data in 1982-1984 dollars */
/* So put into 1983 dollars for now - 1.673 is 1983 factor */
gen incsstotcpi = incsstot*(CPI99/1.673);

/* Instrument from EG (2005) is in 2001 dollars */
/* Also put into 1983 dollars */
/* 1.673 is 1983 factor and 0.967 is 2001 factor */
replace inst = inst*(0.967/1.673);

/* Need education categories */
gen hsdrop = educ0==1;
gen hsgrad = educ0==2;
gen somecoll = educ0==3;
gen collgrad = educ0==4;

/* For marital status, they have pool married and do not have separated */
/* For now, put separated with divorced */
recode marst (1/2 = 1) (3/4 = 4) (5 = 5) (6 = 6), gen(marstnew);
tab marst marstnew;


/* Spousal education categories - account for missing value where no spouse */
gen educspouse0 = 1 if educspouse<=71; /* High School Dropout */
           /* May consider 71=12th, no diploma with group 2 */
replace educspouse0 = 2 if educspouse==72|educspouse==73; /* High school degree */
replace educspouse0 = 3 if educspouse>=80&educspouse<=100; /* Some college */
replace educspouse0 = 4 if educspouse>=110; /* College Degree and up */

gen hsdropsp = educspouse0==1;
gen hsgradsp = educspouse0==2;
gen somecollsp = educspouse0==3;
gen collgradsp = educspouse0==4;


/* Spousal age - need to make zero if no spouse */
replace agespouse = 0 if agespouse==.;

/* Impose additional sample restrictions */
keep if yobh>=1900&yobh<=1930; /* Base sample on YOB of head of EGP family */
     /* DO NOT HAVE INSTRUMENT THROUGH 1933 cohort as EGP */
drop if wtsupp==0; /* Drop those with zero weight */

/* Adjust variables */
replace ageh = 91 if ageh>91; /* Group 91+ - see EGP footnote 11 */

/* In paper, divide main regressor by 1,000 - see page 364 */
/* Need to do the same with the instrument to put on the same scale */
replace incsstotcpi = incsstotcpi/1000;
replace inst = inst/1000;

/* Compare means to their Table 1 */
sum indlive sharlive incsstotcpi hsdrop hsgrad somecoll collgrad female white [w=wtsupp];
bysort marstnew:
  sum indlive sharlive incsstotcpi hsdrop hsgrad somecoll collgrad female white [w=wtsupp];

/* Keep variables for analysis and output the data */
keep sharlive incsstotcpi inst ageh educ0 division marstnew
     agespouse hsgradsp somecollsp collgradsp year qincssnew wtsupp yobh;
sum;

save "cps_ss_final",replace;
